(* Nat Mote
 *
 * Copyright (C) 2023 Semgrep Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1 as published by the Free Software Foundation, with the
 * special exception on linking described in file LICENSE.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 * LICENSE for more details.
 *)

open AST_generic
module G = AST_generic
module H = AST_generic_helpers

(* Provides a subclass of the autogenerated iter visitor with extensions used
 * for matching in Semgrep. There is no mli file (as recommended in
 * http://gallium.inria.fr/~fpottier/visitors/manual.pdf) because the type of a
 * visitor is tedious to describe.
 *
 * TODO Should this move into src? *)

type 'a visitor_env = {
  vardef_assign : bool;
  flddef_assign : bool;
  attr_expr : bool;
  implicit_return : bool;
  extra : 'a;
}

(* NOTE: we do a few subtle things at a few places now for semgrep to trigger a
 * few more artificial visits:
 *  - we call vardef_to_assign (if `vardef_assign` is `true`)
 *  - we generate partial defs on the fly and call kpartial
 *  - we call v_expr on nested XmlXml to give the chance for an
 *    Xml pattern to also be matched against nested Xml elements
 *
 * old: We used to apply the VarDef-Assign equivalence by default, but this was
 * error prone because visitors typically do side-effectful things and VarDefs
 * were visited twice (as a VarDef and as an Assign), thus repeating
 * side-effects, leading to surprises.
 *)
class ['self] matching_visitor =
  object (self : 'self)
    inherit ['self] AST_generic.iter_no_id_info as super

    (********************************************************************)
    (* Helpers *)
    (********************************************************************)

    (* The recurse argument is subtle. It is needed because we want different
     * behaviors depending on the context:
     * - in some context we want to recurse, for example when we call ii_of_any
     *   (Partial ...), we want to get all the tokens in it
     * - in other context we do not want to recurse, because that would mean we
     *   would visit two times the same function (one with a body, and one
     *   without a body), which can lead some code, e.g., Naming_AST, to
     *   generate intermediate sids which in turn lead to regressions in
     *   end-2-end tests (because the value of sid differ).
     * This is why when we are called from visit_partial (which is called by the
     * generated visit_any), we recurse, but when we are called from e.g.
     * visit_definition, where we construct a new PartialDef, we don't.
     *)
    method v_partial ~recurse env x = if recurse then super#visit_partial env x

    method v_named_attr_as_expr env name args =
      (* A named attribute is essentially a function call, but this is not
       * explicit in Generic so we cannot match expression patterns against
       * attributes. This equivalence enables exactly that, and we can e.g.
       * match `@f(a)` with `f($X)`. *)
      if env.attr_expr then self#visit_expr env (e (Call (e (N name), args)))

    method v_stmts env xs =
      match xs with
      | [] -> ()
      | x :: xs ->
          self#visit_stmt env x;
          (* we will call the visitor also on subsequences. This is useful
           * for semgrep *)
          self#v_stmts env xs

    method v_fields env xs =
      (* As opposed to v_stmts, we don't call v_fields recursively for sublists
       * of xs. Indeed, in semgrep, fields are matched in any order so calling
       * the visitor and matcher on the entire list of fields should also work.
       * *)
      self#visit_list self#visit_field env xs

    method v_vardef_as_assign_expr env ventity =
      function
      | VarDef ({ vinit = Some _; _ } as vdef) when env.vardef_assign ->
          (* A VarDef is implicitly a declaration followed by an assignment
           * expression, so we should visit the assignment expression as well.
           *
           * Note that we cannot treat this as a simple equivalence later, as
           * expressions are visited separately from statements.
           *
           * This feels a bit hacky here, so let's take a TODO to improve this
           *)
          self#visit_expr env (H.vardef_to_assign (ventity, vdef))
      | _ -> ()

    method v_flddef_as_assign_expr env ventity =
      function
      (* No need to cover the VarDef({vinit = Some _; )} case here. It will
       * be covered by v_vardef_as_assign_expr at some point when v_field
       * below call v_stmt (which itself will call v_def).
       *
       * In certain languages like Javascript, some method definitions look
       * really like assignements, so we would like an expression pattern like
       * '$X = function() { ...}' to also match code like
       * 'class Foo { x = function() { return; } }'.
       *)
      | FuncDef fdef when env.flddef_assign ->
          let resolved = Some (LocalVar, G.SId.unsafe_default) in
          self#visit_expr env (H.funcdef_to_lambda (ventity, fdef) resolved)
      | _ -> ()

    (* WEIRD: not sure why, but using this code below instead of
     * the v_def_as_partial above cause some regressions.
     *
     *  (* calling kpartial with a modified def *)
     *  (match x with
     *  | ent, FuncDef def ->
     *     let partial_def = { def with fbody = empty_fbody } in
     *     v_partial (PartialDef (ent, FuncDef partial_def))
     *  | _ -> ()
     *  )
     *)
    method v_def_as_partial env ent defkind =
      (* calling kpartial with a modified def *)
      match defkind with
      | FuncDef def ->
          let partial_def = { def with fbody = FBNothing } in
          self#v_partial ~recurse:false env
            (PartialDef (ent, FuncDef partial_def))
      | ClassDef def ->
          let partial_def = { def with cbody = empty_body } in
          self#v_partial ~recurse:false env
            (PartialDef (ent, ClassDef partial_def))
      | _ -> ()

    method v_expr_as_return_stmt env e =
      (* An implicit return is an expression whose value is being returned
       * from a function but without the return keyword. This is not explicit
       * in Generic so we cannot match statement patterns against these
       * expressions. This equivalence enables that.
       *
       * We need an equivalence here, and we also need support in Generic_vs_generic
       * in order to handle 2 different cases.
       *
       * The equivalence here will result in comparing
       *   return $X
       * against
       *   return expr
       * in Generic_vs_generic.
       *
       * On the other hand, in Generic_vs_generic, we would like to match
       *   return $X
       * against
       *   expr_stmt
       * which can happen when return $X and expr_stmt is nested within
       * some context.
       *
       * Alt: we could add more special cases in Generic_vs_generic to handle
       * things like FBExpr (which is not visited when matching the pattern
       ' `return`, because FBExpr is an expression, not a statement), but the
       * equivalence feels more natural here, and it allows us to keep
       * the cases in Generic_vs_generic not too complicated.
       *)
      if env.implicit_return && e.is_implicit_return then
        let ret = Return (fake "return", Some e, sc) |> s in
        self#visit_stmt env ret

    (*********************************************************************)
    (* Overrides:
     *
     * These implement matching-specific behavior such as generating partial
     * nodes.
     * *)
    (*********************************************************************)

    method! visit_xml_body env xml =
      match xml with
      | XmlXml v1 ->
          (* subtle: old: let v1 = v_xml v1 in ()
           * We want a simple Expr (Xml ...) pattern to also be matched
           * against nested XmlXml elements *)
          self#visit_expr env (Xml v1 |> G.e)
      | _else_ -> super#visit_xml_body env xml

    method! visit_expr env x =
      match x.e with
      | Container (v1, v2) ->
          (match v1 with
          (* less: could factorize with case below by doing List|Dict here and
           * below in Tuple a String|Id
           *)
          | Dict ->
              v2 |> Tok.unbracket
              |> List.iter (fun e ->
                     match e.e with
                     | Container
                         ( Tuple,
                           (tok, [ { e = L (String (_, id, _)); _ }; e ], _) )
                       ->
                         let t = Tok.fake_tok tok ":" in
                         self#v_partial ~recurse:false env
                           (PartialSingleField (id, t, e))
                     | _ -> ())
          (* for Go where we use List for composite literals.
           * TODO? generate Dict in go_to_generic.ml instead directly?
           *)
          | List ->
              v2 |> Tok.unbracket
              |> List.iter (fun e ->
                     match e.e with
                     | Container
                         (Tuple, (tok, [ { e = N (Id (id, _)); _ }; e ], _)) ->
                         let t = Tok.fake_tok tok ":" in
                         self#v_partial ~recurse:false env
                           (PartialSingleField (id, t, e))
                     | _ -> ())
          | _ -> ());
          let _v1 = self#visit_container_operator env v1
          and _v2 =
            self#visit_bracket (self#visit_list self#visit_expr) env v2
          in
          ()
      | _else -> super#visit_expr_kind env x.e

    method! visit_argument env x =
      (match x with
      | ArgKwd (v1, v2) ->
          let tok = snd v1 in
          let t = Tok.fake_tok tok ":" in
          self#v_partial ~recurse:false env (PartialSingleField (v1, t, v2))
      | _else -> ());
      super#visit_argument env x

    method! visit_todo_kind env x =
      (* bugfix: do not call visit_ident here, otherwise code like
       * Analyze_pattern might consider the string for -filter_irrelevant_rules
       *)
      let _str, tok = x in
      self#visit_tok env tok

    method! visit_attribute env x =
      (match x with
      | NamedAttr (_, v1, v2) -> self#v_named_attr_as_expr env v1 v2
      | _else -> ());
      super#visit_attribute env x

    method! visit_case_and_body env x =
      self#v_partial ~recurse:false env (PartialSwitchCase x);
      super#visit_case_and_body env x

    method! visit_stmt env x =
      (match x.s with
      | If (t, Cond v1, _v2, _v3) ->
          self#v_partial ~recurse:false env (PartialIf (t, v1))
      | Switch (v0, Some (G.Cond v1), _v2) ->
          self#v_partial ~recurse:false env (PartialMatch (v0, v1))
      | Try (t, v1, _v2, _v3, _v4) ->
          self#v_partial ~recurse:false env (PartialTry (t, v1))
      | ExprStmt (v1, _) -> self#v_expr_as_return_stmt env v1
      | _else -> ());
      (* todo? visit the s_id too? *)
      super#visit_stmt_kind env x.s

    method! visit_catch env x =
      self#v_partial ~recurse:false env (PartialCatch x);
      super#visit_catch env x

    method! visit_finally env x =
      let t, v = x in
      self#v_partial ~recurse:false env (PartialFinally (t, v));
      super#visit_finally env x

    method! visit_definition env x =
      let v1, v2 = x in
      self#v_vardef_as_assign_expr env v1 v2;
      self#v_def_as_partial env v1 v2;
      super#visit_definition env x

    method! visit_partial env x = self#v_partial ~recurse:true env x

    method! visit_function_definition env x =
      self#v_partial ~recurse:false env (PartialLambdaOrFuncDef x);
      super#visit_function_definition env x

    method! visit_function_body env x =
      (match x with
      | FBExpr v1 -> self#v_expr_as_return_stmt env v1
      | _else_ -> ());
      super#visit_function_body env x

    method! visit_field env x =
      match x with
      | F v1 ->
          (match v1.s with
          | DefStmt
              ( { name = EN (Id (id, _)); _ },
                FieldDefColon { vinit = Some e; _ } ) ->
              let t = Tok.fake_tok (snd id) ":" in
              self#v_partial ~recurse:false env (PartialSingleField (id, t, e))
          | DefStmt (ent, def) -> self#v_flddef_as_assign_expr env ent def
          | _ -> ());
          self#visit_stmt env v1

    method! visit_class_definition env
        { ckind; cextends; cimplements; cmixins; cparams; cbody } =
      (* This is handcoded so that we can call v_fields which calls the
       * client-supplied visitor function *)
      self#visit_wrap self#visit_class_kind env ckind;
      self#visit_list self#visit_class_parent env cextends;
      self#visit_list self#visit_type_ env cimplements;
      self#visit_list self#visit_type_ env cmixins;
      self#visit_parameters env cparams;
      self#visit_bracket self#v_fields env cbody

    (* Overrides to call v_fields instead of the autogenerated code which just
     * repeatedly calls visit_field. We could instead define a `fields` type in
     * AST_generic, use it instead of `field list` wherever we want this
     * behavior, and override `visit_fields`. *)
    method! visit_Record env v1 = self#visit_bracket self#v_fields env v1

    method! visit_TyRecordAnon env v0 v1 =
      self#visit_wrap self#visit_class_kind env v0;
      self#visit_bracket self#v_fields env v1

    method! visit_AndType env v1 = self#visit_bracket self#v_fields env v1
    method! visit_Flds env v1 = self#v_fields env v1

    (* Overrides to call v_stmts instead of the autogenerated code which just
     * repeatedly calls visit_stmt. We could instead define a `stmts` type in
     * AST_generic, use it instead of `stmt list` wherever we want this visitor
     * behavior, and override `visit_stmts`. *)

    method! visit_Block env v1 = self#visit_bracket self#v_stmts env v1

    method! visit_ModuleStruct env v1 v2 =
      self#visit_option self#visit_dotted_ident env v1;
      self#v_stmts env v2

    method! visit_program env v1 = self#v_stmts env v1
    method! visit_Ss env v1 = self#v_stmts env v1
  end

(* To keep the type simple, `extra` must be explicitly specified *)
let mk_env ?(vardef_assign = false) ?(flddef_assign = false)
    ?(attr_expr = false) ?(implicit_return = false) extra =
  { vardef_assign; flddef_assign; attr_expr; implicit_return; extra }
